Differential

TLS DE analysis

Preamble

Code
options(ggrepel.max.overlaps = Inf)

.volcano <- function(data, name1, name2, fdr=0.1, lfc=2, one_sided=FALSE,
                     fdr_label=fdr, lfc_label=lfc){
  
  data$expression = ifelse(data$FDR < fdr_label & data$logFC >= lfc_label, 'Up',
                     ifelse(data$FDR < fdr_label & data$logFC <= -lfc_label ,'Down', 'Stable')
                     )
  data$gene <- rownames(data)
  if (one_sided){
    DEGs <- subset(data, expression =="Up")
    xaxis <- c(0,NA)
  }else{
    DEGs <- subset(data, expression %in% c("Up", "Down"))
    xaxis <- c(min(data[["logFC"]], na.rm = TRUE) - 1.5, max(data[["logFC"]], na.rm = TRUE) +
    1.5)
  }
  significant <- rownames(DEGs)
  keyvals.colour <- ifelse(data$logFC < -lfc & data$FDR < fdr, "#662965",
                    ifelse(data$logFC > lfc & data$FDR < fdr,"gold1",
                      ifelse(data$FDR >= fdr,"#8F8F8F","steelblue4"
                      )))
  names(keyvals.colour)[keyvals.colour == "#662965"] <- paste0('Up in ', name1)
  names(keyvals.colour)[keyvals.colour == "gold1"] <- paste0('Up in ', name2)
  names(keyvals.colour)[keyvals.colour == "steelblue4"] <- 'Low FC'
  names(keyvals.colour)[keyvals.colour == '#8F8F8F'] <- 'NS'
  
  EnhancedVolcano(data,
    x = "logFC", y = "FDR",
    FCcutoff = lfc, pCutoff = fdr,
    pointSize = 1, raster = TRUE,
    title = NULL, subtitle = NULL,
    lab = data[["gene"]], labSize = 2,
    selectLab = rownames(DEGs),
    xlim = xaxis,
    ylim = c(0,NA),
    xlab = bquote(~Log[2]~ 'FC'),
    ylab = bquote("-"~Log[10]~ 'adj. p-val'),
    colCustom = keyvals.colour,
    maxoverlapsConnectors = Inf,
    drawConnectors = TRUE, widthConnectors = 0.25) +
  guides(col = guide_legend(override.aes = list(alpha = 1, size = 3))) +
  theme_bw(9) +
    theme(#aspect.ratio = 1,
          legend.title = element_blank(),
          legend.position = "top",
          panel.grid.minor = element_blank())
}
Code
library(dplyr)
library(tidyr)
library(edgeR)
library(ggplot2)
library(DT)
library(UpSetR)
library(EnhancedVolcano)
library(cowplot)
library(SingleCellExperiment)
library(ExploreModelMatrix)
library(scater)
library(ggrepel)

Data

Code
spe <- readRDS("../outs/01-spe.rds")

#y <- counts(spe) > 3
#spe <- spe[
#    rowSums(y) > 1,
#    colSums(y) > 1]

# keep features detected in at least 20 spots in any sample
# and spots with at least 200 detected features overall
idx <- split(seq(ncol(spe)), spe$sample_id)
gs <- sapply(idx, \(.) {
    y <- counts(spe[, .]) > 1
    rowSums(y) >= 20
})
spe <- spe[rowAnys(gs), ]
spe <- spe[, colSums(counts(spe) > 0) >= 200]

# filter pat that is not part of the GEOMX data
spe <- spe[,!spe$sample_id %in% "B07_38596"]
spe$sample_id <- as.character(spe$sample_id)
spe$sample_id[spe$sample_id %in% "B06_24137"] <- "B06_24136"
spe$sample_id <- as.factor(spe$sample_id)

spe$anno2 <- as.character(spe$anno2)
spe$anno2 <- gsub("24137", "24136", spe$anno2)
spe$anno2 <- as.factor(spe$anno2)

nan <- is.na(spe$anno2)
spe <- spe[, !nan]
spe$anno2 <- spe$anno2 |> forcats::fct_collapse(
  "NOR" = c("NOR", "TUM"))
spe$anno3 <- spe$anno2
spe$anno3 <- as.character(spe$anno3)
spe$anno3[grep(".*MTLS", spe$anno3)] <- "MTLS"
spe$anno3[grep(".*ETLS", spe$anno3)] <- "ETLS"


# Filter spots
spe <- spe[,!spe$TumorType %in% "LUAD"]
spe <- spe[,!spe$anno1 %in% c("EXCL", "LN", "INFL")]

#Remove non matching anno
spe <- spe[,!spe$anno2 %in% c("INFL", "24784_ETLS57", "EXCL")]

spe$anno3 <- as.factor(spe$anno3)
spe$anno3 <- droplevels(spe$anno3)
spe$anno2 <- droplevels(spe$anno2)
spe$anno1 <- droplevels(spe$anno1)
spe$TumorType <- droplevels(spe$TumorType)
spe$anno3 <- relevel(spe$anno3, ref= "NOR")



spe$anno4 <- as.character(spe$anno2)

spe$anno4[grep("NOR", spe$anno4)] <- paste0(spe$sample_id[grep("NOR", spe$anno4)], "_NOR")

spe$PatTum <- paste0(spe$sample_id, "_", spe$TumorType)
spe$PatTum <- spe$PatTum |> forcats::fct_collapse(
  "1" = c("B04_17776_LSCC", "B05_8527_ccRCC"),
  "2" = c("B05_32288_LSCC", "B06_24784_ccRCC"),
  "3" = c("B06_24136_LSCC", "B07_30616_ccRCC"))

DE analysis model 1

Model1: DE associated with maturation

Code
# Generate pseudo bulk object
pbs <- aggregateAcrossCells(spe, ids=spe$anno4)

design1 <- model.matrix(~ TumorType + TumorType:PatTum + TumorType:anno3, colData(pbs))
#design1 <- design1[,!colnames(design1) %in% "TumorTypeccRCC:PatTum4"]

#Fit model
dgl1 <- DGEList(counts(pbs))
keep <- filterByExpr(dgl1, design1)
#dgl1 <- dgl1[keep, , keep.lib.sizes=FALSE]
dgl1 <- calcNormFactors(dgl1)
dgl1 <- estimateDisp(dgl1, design1, robust=TRUE)
fit1 <- glmQLFit(dgl1, design1, robust=TRUE)
Code
colnames(design1) <- gsub(":", "_", colnames(design1))

# Define different contrasts
contrast_mod1 <- makeContrasts(
  ETLS_vs_par_inLSCC = TumorTypeLSCC_anno3ETLS,
  ETLS_vs_par_inRCC = TumorTypeccRCC_anno3ETLS,
  SFL_TLS_vs_par_inLSCC = TumorTypeLSCC_anno3MTLS,
  SFL_TLS_vs_par_inRCC = TumorTypeccRCC_anno3MTLS,
  ETLS_vs_SFTLS_inLSCC = TumorTypeLSCC_anno3ETLS - TumorTypeLSCC_anno3MTLS,
  ETLS_vs_SFTLS_inRCC = TumorTypeccRCC_anno3ETLS - TumorTypeccRCC_anno3MTLS,
  levels=design1)

DE results model 1

Volcano plots

Model2: Interaction model without patients

Code
design2 <- model.matrix(~ 0 + TumorType:anno3, colData(pbs))


# fit deseq2 GLM model
dgl2 <- DGEList(counts(pbs))
keep <- filterByExpr(dgl2, design2)
#dgl2 <- dgl2[keep, , keep.lib.sizes=FALSE]
dgl2 <- calcNormFactors(dgl2)
dgl2 <- estimateDisp(dgl2, design2, robust=TRUE)
fit2 <- glmQLFit(dgl2, design2, robust=TRUE)
Code
colnames(design2) <- gsub(":", "_", colnames(design2))

# Define different contrasts
contrast_mod2 <- makeContrasts(
  ETLS_inRCC_vs_LSCC = TumorTypeccRCC_anno3ETLS - TumorTypeLSCC_anno3ETLS,
  SFL_TLS_inRCC_vs_LSCC = TumorTypeccRCC_anno3MTLS - TumorTypeLSCC_anno3MTLS,
  par_inRCC_vs_LSCC = TumorTypeccRCC_anno3NOR -TumorTypeLSCC_anno3NOR,
  allTLS_inRCC_vs_LSCC = (TumorTypeccRCC_anno3ETLS + TumorTypeccRCC_anno3MTLS)/2 - (TumorTypeLSCC_anno3ETLS + TumorTypeLSCC_anno3MTLS)/2,
  levels=design2)

DE tab model2

Volcano plots

Compare results GEO/VIS

Code
# DE objects
filenames <- list.files("../outs/DE", pattern="*.csv", full.names=TRUE)
filenam <- list.files("../outs/DE", pattern="*.csv", full.names=FALSE)
filenam <- gsub(".csv$", "", filenam)
de_list <- lapply(filenames, read.csv)
names(de_list) <- filenam


# subset res to de-genes
de_gen_list <- lapply(de_list, function(.){
  if(! "gene" %in% colnames(.)){
      .$gene <- .$X
  }
  .
}) 

mod1_names <- grep("model1_", names(de_gen_list), value = T)
mod2_names <- grep("model2_", names(de_gen_list), value = T)


geo_mod1 <- de_gen_list[mod1_names]
geo_mod2 <- de_gen_list[mod2_names]

names(geo_mod1) <- gsub("model1_", "", names(geo_mod1))
names(geo_mod2) <- gsub("model2_", "", names(geo_mod2))

FC compare

Code
res_mod2 <- lapply(res_mod2, function(res_tab){
  res_tab$gene <- rownames(res_tab)
  res_tab
})

res_mod1 <- lapply(res_mod1, function(res_tab){
  res_tab$gene <- rownames(res_tab)
  res_tab
})

case_sig <- function(FDR1, FDR2, com1, com2, FDR_t){
  case_when(
  FDR1 <= FDR_t & !! FDR2 <= FDR_t ~ "both",
  FDR1 <= FDR_t & !! FDR2 > FDR_t ~ com1,
  FDR1 > FDR_t & !! FDR2 <= FDR_t ~ com2,
  FDR1 > FDR_t & !! FDR2 > FDR_t ~ "none"
)
}


## plotting function
logFC_plot <- function(log_FC_tab,
                       comp1, 
                       comp2, 
                       FDR_t = 0.1,
                       logFC_both = 0.5){
  # subset dataframe
  log_FC_sub <- log_FC_tab |> 
    select(c(logFC,FDR,gene, tech)) |> 
    pivot_wider(names_from = tech, 
                values_from = c(logFC, FDR))
  
  # new colum names
  logFC_comp1 <- grep(paste0("logFC_", comp1), colnames(log_FC_sub), value = T)
  logFC_comp2 <- grep(paste0("logFC_", comp2), colnames(log_FC_sub), value = T)
  FDR_comp1 <- grep(paste0("FDR_", comp1), colnames(log_FC_sub), value = T)
  FDR_comp2 <- grep(paste0("FDR_", comp2), colnames(log_FC_sub), value = T)
  
  # Add sig threshold
  log_FC_sub <- log_FC_sub |> 
    mutate(sig = case_sig(FDR1 = !!sym(FDR_comp1), 
                          FDR2 = !!sym(FDR_comp2), 
                          com1 = comp1, 
                          com2 = comp2, 
                          FDR_t = FDR_t))
  
  # Add genes to highlight
  log_FC_sub <- log_FC_sub |> mutate(
    highlight = case_when(
      sig %in% c("both") & 
        sign(!!sym(logFC_comp1)) == sign(!!sym(logFC_comp2)) ~ gene,
      .default = ""
      )
    ) 
  
  # colors
  color_pal <- c("grey","#fdb462", "#ff6666","#0099cc")
  names(color_pal) <- c("none", "both", comp1, comp2)
  
  # plot
  p <- ggplot(log_FC_sub, aes_string(x=logFC_comp1, 
                       y=logFC_comp2)) + 
    geom_point(aes(colour = sig), size = 0.9, alpha = 0.7) +
    geom_vline(xintercept = 0) +
    geom_hline(yintercept = 0) + 
    #xlim(0, 1) +
    #ylim(0, 1) +
    scale_color_manual(values = color_pal) + 
    theme_bw()
  
  # annotations
  #p + geom_text_repel(
  #  aes(label = highlight),
  #  family = "Poppins",
  #  size = 2,
  #  min.segment.length = 0, 
  #  seed = 42, 
  #  box.padding = 0.5,
  #  max.overlaps = Inf,
  #  arrow = arrow(length = unit(0.005, "npc")),
  #  nudge_x = .15,
  #  nudge_y = .5,
  #  color = "#3D3D3D"
  #)
  p
}

Upset plots

Compare expression

Code
sce <- readRDS(file.path("..","outs", "01-sce.rds"))

sce <- sce[,!sce$TissueSub %in% "LN"]
sce$Origin <- sce$TissueSub |> forcats::fct_collapse(
  "parenchymal" = c("Alveoles", "Kidney", "LSCC", "RCC"))
sce$Origin <- sce$Origin |> droplevels()
sce$Origin <- relevel(sce$Origin, ref="parenchymal")


sce$anno5 <- paste0(sce$PatientID, "_", sce$Origin)
pb_geo <- aggregateAcrossCells(sce, ids=sce$anno5, use.assay.type="logcounts",
                               statistics = "mean")
pb_geo$PatientID <- gsub("^r", "", pb_geo$PatientID)
pb_geo$anno3 <- pb_geo$Origin
levels(pb_geo$anno3) <- list(NOR  = "parenchymal", 
                              ETLS = "E_TLS",
                              MTLS = "SFL_TLS",
                              EXCL = "PFL_TLS",
                              EXCL = "Tcell_TLS")

spe$PatientID <- gsub(".*_", "", spe$sample_id)
spe$anno5 <- paste0(spe$PatientID, "_", spe$anno3)
pb_vis <- aggregateAcrossCells(spe, ids=spe$anno5, use.assay.type="logcounts",
                               statistics = "mean")
#pb_vis <-pb_vis[keep,]

overlap_gene <- intersect(rownames(pb_vis), rownames(pb_geo))
overlap_pat <- intersect(pb_geo$PatientID, pb_vis$PatientID)